clear; clc; close all

dir = 'C:\Users\Mittal\Documents\Work\Code\survival-analysis\perl\LR_CDS_NTDB_TrainCDS+NTDB_TestCDS_Imputed\35-65\';

nFiles = 10;

% CDS_keys = '1,2,3,4

fnames = {
    'CDS_NTDB_TrainCDS+NTDB_TestCDS_Imputation_HD_CDS_1_NTDB_1_35-65_','_mice_imputed_0.txt'
    'CDS_NTDB_TrainCDS_TestCDS_Imputation_LD_CDS_1_NTDB_1_35-65_','_mice_imputed_0.txt'
    };

fnames_trueTestLabels = {'CDS_NTDB_TrainCDS+NTDB_TestCDS_Output_Test_CDS_1_NTDB_1_35-65_','.txt'};

nTest = 4964;
%fracCDS = 1.0; %fraction of CDS data in training data
nCDS_train = 7638 - nTest;

for f = 1:size(fnames,1),
    for i = 1:nFiles,
        fname_trueTestLabels = [fnames_trueTestLabels{1} num2str(i) fnames_trueTestLabels{2}];
        fid = fopen([dir fname_trueTestLabels]);
        labels = textscan(fid,'%s','delimiter','\n');
        labels = labels{1};
        fclose(fid);
        fname = [fnames{f,1} num2str(i) fnames{f,2}];
        fprintf('Processing file... %s\n',fname);
        fnamepath = [dir fname];
        
        fid = fopen(fnamepath);
        p = textscan(fid,'%s','delimiter','\n');
        p = p{1};
        fclose(fid);

        if(~isempty(strfind(fname,'_Imputation_')))
            fname_train = [dir strrep(fname,'_Imputation_','_Imputation_Train_')];
            fname_test = [dir strrep(fname,'_Imputation_','_Imputation_Test_')];
        else
            fprintf('substring not found in %s\n. 1. Exiting...',fname);
            return;
        end
        
        if(~isempty(strfind(fname,'_HD_')))
            if(~isempty(strfind(fname,'_TrainCDS+NTDB_TestCDS_Imputation_')))
                fname_train_CDSNTDB = [dir strrep(fname,'_TrainCDS+NTDB_TestCDS_Imputation_','_TrainCDS_TestCDS_Imputation_Train_')];
            else
                fprintf('substring not found in %s\n. 2. Exiting...',fname);
                return;
            end
        end
        
        if(~isempty(strfind(fname,'_Imputation_')))
            fname_test_trueLabels = [dir strrep(strrep(fname,'_Imputation_','_Imputation_Test_'),'.txt','_trueTestLabels.txt')];
        else
            fprintf('substring not found in %s\n. 4. Exiting...',fname);
            return;
        end
        
        trainLines = p(1:length(p)-nTest,1);
        testLines = p(end-nTest+1:end,1);
        
        if(~isempty(strfind(fname,'_HD_')))
            trainLines_CDSNTDB = trainLines(1:nCDS_train,1);
            dlmcell(fname_train_CDSNTDB,trainLines_CDSNTDB);
        end
        
        dlmcell(fname_train,trainLines);
        dlmcell(fname_test,testLines);
        
        for j = 1:length(testLines),
            testLines{j}(1) = labels{j};
        end
        
        dlmcell(fname_test_trueLabels,testLines);
    end
end